2

Code:

'''
Created on 2018年2月11日

python 3.6

@author: Livon

'''

import urllib.request

import re

url = 'https://gupiao.baidu.com/stock/sz002633.html'

print( 'url: ' + url )

htmlResponse = urllib.request.urlopen( url )
html = htmlResponse.read()
html = html.decode('utf8')

marketCaps = re.findall("<dl><dt>总市值</dt><dd>(.*?)亿</dd></dl>",html)
# 匹配换行
dates = re.findall('<span class="state f-up">(.*?)\s+</span>',html)

for i in range( 0, len( marketCaps )):
    print( '总市值:' + marketCaps[i] + ' 亿' )
    
for i in range( 0, len( dates )):
#     print('时间:', end='' )    
    date = re.sub( "&nbsp;", "", dates[i] )
    print('时间:' + date )
        

代码摘选

  • html = html.decode('utf8') # 转码
  • dates = re.findall('<span class="state f-up">(.*?)\s+</span>',html) # 匹配换行
  • print('时间:', end='' ) # 不换行
  • date = re.sub( "&nbsp;", "", dates[i] ) # 正则替换

output

url: https://gupiao.baidu.com/stock/sz002633.html
总市值:12.69 亿
时间:已休市 2018-02-09 15:00:03

风中之枫
27 声望2 粉丝